{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train : (1456, 252)\n",
      "test : (1459, 252)\n"
     ]
    }
   ],
   "source": [
    "# 导入必要的工具包\n",
    "# 数据读取及基本处理\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "#模型\n",
    "from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, ElasticNetCV\n",
    "\n",
    "#模型评估\n",
    "from sklearn.metrics import mean_squared_error\n",
    "\n",
    "#可视化\n",
    "import matplotlib.pyplot as plt\n",
    "# 读入数据\n",
    "train = pd.read_csv(\"AmesHouse_FE_train.csv\")\n",
    "print(\"train : \" + str(train.shape))\n",
    "\n",
    "test = pd.read_csv(\"AmesHouse_FE_test.csv\")\n",
    "print(\"test : \" + str(test.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train = train[\"SalePrice\"]\n",
    "X_train = train.drop(['SalePrice'], axis = 1)\n",
    "\n",
    "test_Id = test['Id']\n",
    "X_test = test.drop(['Id'], axis = 1)\n",
    "#准备训练数据。\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "# 初始化对目标值的标准化器\n",
    "\n",
    "mean_y = y_train.mean()\n",
    "std_y = y_train.std()\n",
    "y_train = (y_train - mean_y)/std_y\n",
    "#数据的标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "最小二乘线性回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RMSE on Training set : 0.26926777282231273\n"
     ]
    }
   ],
   "source": [
    "lr = LinearRegression()\n",
    "# 生成学习器实例\n",
    "\n",
    "lr.fit(X_train, y_train)\n",
    "#在训练集上训练学习器\n",
    "\n",
    "y_train_pred = lr.predict(X_train)\n",
    "#训练上测试，得到训练误差，实际任务中这一步不需要\n",
    "\n",
    "rmse_train = np.sqrt(mean_squared_error(y_train,y_train_pred))\n",
    "\n",
    "print(\"RMSE on Training set :\", rmse_train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RMSE on Training set : 0.2695070585615205\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import SGDRegressor\n",
    "\n",
    "# 使用默认配置初始化线\n",
    "# 生成学习器实例 \n",
    "sgdr = SGDRegressor(max_iter = 5000)\n",
    "\n",
    "#  用训练数据训练模型，得到模型参数\n",
    "sgdr.fit(X_train, y_train)\n",
    "\n",
    "#  预测\n",
    "y_train_pred = sgdr.predict(X_train)\n",
    "\n",
    "rmse_train = np.sqrt(mean_squared_error(y_train,y_train_pred))\n",
    "\n",
    "print(\"RMSE on Training set :\", rmse_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "使用最小二乘解析求解的效果不是很好"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "L2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best alpha : 10.0\n",
      "cv of rmse : 0.31405648167074857\n",
      "RMSE on Training set : 0.276363034926764\n",
      "Ridge picked 229 features and eliminated the other 22 features\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEICAYAAABWJCMKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xm8HFWZ//HPF8IiOyGsISGgLAFlUWBwUEFQEGRTBwQRIyAMTlRABCK4RB2dKItkBueHyJIwIIKAiixCjAKigCYxIYEga4SQmITVsAgEnt8f51zpdLrvrdvVffve9Pf9euV1q6tOVT23+qZP1zl1nqOIwMzMOs8K7Q7AzMzawxWAmVmHcgVgZtahXAGYmXUoVwBmZh3KFYCZWYdyBWD/JGlrSX+WtFjSFyS9RdIvJT0v6aeSjpR0a4HjnCHpor6IuZsYhkt6QdKKTTreWEmXN+NYBc7V7XWWdJukz/RFLM3Qm3glhaS3tTomS1wBDECSPiFpSv6Amy/pZknvacKhTwNui4g1I+K/gX8DNgTWi4hDI+KKiNinp4NExHciovQHlKQR+QNhUG/3jYjHI2KNiHi9gfPuKWlub/frxfEnSHo1v3/PSJokaZuu7UWvcwviGpuv9xeq1p+U14/t65istVwBDDCSvgicB3yH9OE8HPhf4OAmHH4z4L6q1w9GxJImHNuW9r2IWAMYCjwJXNzmeLo8CIyqWvepvN6WM64ABhBJawPfBEZHxHUR8WJEvBYRv4yIU3OZVSSdJ2le/neepFUqjnGApOmSnpP0B0nb5/W/Ad4PnJ+/mV4JfA34eH59rKRPS7qz4ljb5W+vz0haIOmMvH6p5hJJu+VzPSdphqQ9K7bdJulbkn6fm55ulTQkb74j/3wux/BuSW+TdHtulnpK0lV1rtVSdw89nKdyv9WBm4FN8jlfkLRJ3ryypMvy/vdJ2rliv00kXStpkaTHqr9F1xMRLwNXAztWHKv6On9Q0gP5dz4fUMW2FSWdk6/FY5I+V/V7ry3p4nyn+KSk/+yhWexPwGqStsv7bwe8Ja+vvE7HSXo4v/fXV1yjbuPN24+RNFvSs5JukbRZkWtlzecKYGB5N7Aq8LNuypwJ7Eb6QNkB2BX4CoCkdwKXAP8OrAf8ELhe0ioRsRfwO+BzuenkCNJdxlX59VLfUCWtCfwa+BWwCfA2YHJ1MJKGAjcC/wkMBr4EXCtp/YpinwCOBjYAVs5lAN6Xf66TY7gL+BZwK7AusCnwP91ci2r1zvNPEfEisB8wL59zjYiYlzcfBPwEWAe4Hjg//44rAL8EZpC+0e8NnCRp354CyhXOEcDDdbYPAa4lvYdDgEeA3SuKHJfj3RF4J3BI1SEmAktI789OwD5AT81z/0f61g/pbuCyqpj2Av4LOAzYGPgr6br0GK+kQ4AzgI8C65P+5q7sIR5rEVcAA8t6wFM9NMkcCXwzIhZGxCLgG8BRedtxwA8j4p6IeD0iJgKvkCqM3joA+FtEnBMR/4iIxRFxT41ynwRuioibIuKNiJgETAH2ryhzaUQ8WOvbcA2vkZqmNsnnvbObstV6c55a7sy/x+ukD8kd8vpdgPUj4psR8WpEPAr8CDi8m2N9SdJzwGLgPbz5HlXbH7g/Iq6JiNdIzX9/q9h+GDA+IuZGxLPAuK4NkjYkVQ4n5bvFhcD3e4gL4HLgCEkr5bLVnd9HApdExLSIeAX4MvBuSSMKxPvvwH9FxOz8d/wdYEffBbSHK4CB5WlgiLrvFN2E9I2sy1/zOkgfnKfkppjn8gfQsIrtvTGM9O2uJ5sBh1ad8z2kb45dKj8gXgLW6OZ4p5GaFP6Ym2GO6UXMvTlPkf1Xze/FZqQmo8rf8QxSH009Z0fEOsAI4GVg6zrlNgGe6HoRKXvjE/W2Vy1vBqwEzK+I64ekO6C6IuJx0h3Jd4CHIuKJqiJL/Y1FxAukv82hBeLdDBhfEc8zpPdzaHcxWWv0+ukKa6u7gH+QbvOvqVNmHkt35g7P6yD9R/x2RHy7CbE8QWq6KFLu/yLiuAbOsUyq2oj4G+lOBqUnn34t6Y6IqNmE0qDepsh9AngsIrbs9YkiHpd0IjBR0g357qTSfFJlC4AkVb7O2zeteF257QnSHd6QBjryLyM1Fx5dY1vX31hXTKuT7k6fLBBv19/gFb2Mx1rAdwADSEQ8T+qY/YGkQyStJmklSftJ+l4udiXwFUnr5/bYr/HmLfyPgBMk/YuS1SV9OLfn99YNwEZKjwiuImlNSf9So9zlwIGS9s0dlqsqPWa5aY2y1RYBbwBbdK2QdGjFvs+SPqx7/ahnDxYA6yl1uhfxR+Dvkk5XGjuxoqS3S9qlyM65WWwecHyNzTcC20n6aL7b+AKwUcX2q4ETJQ2VtA5wesVx55P6S86RtJakFSS9VdIeBcK6itRfcHWNbT8Gjpa0o9IDBt8B7omIOQXivQD4ckUn89qSDi0Qj7WAK4ABJiLOBb5I6mRbRPpG9Tng57nIf5La2O8FZgLT8joiYgrp2/P5pA/Ph4FPNxjHYuCDwIGkppGHSE8RVZd7gvSI6hkV8Z5Kgb+9iHgJ+Dbw+9xksBupvf0eSS+QOmJPjIjHGvkdujnvA6SK9NF83m6byHKfwIGkPoXHgKeAi4CiFQjAWcBpqnhiKx/7KeBQUtv+08CWwO8rivyI9CF/L/Bn4CZSp29XpfgpUof3/aT3/BqWbn6r9zu9HBG/rnFHQkRMBr5K6uydD7yV3K/QU7wR8TPgu8BPJP0dmEXqp7A2kCeEMVt+SNoPuCAi3KlqPfIdgNkAlpuc9pc0KD9y+3W6f0zY7J98B2A2gElaDbgd2Ib0NNGNpGaxv7c1MBsQXAGYmXUoNwGZmXWofjEOYMiQITFixIh2h2FmNqBMnTr1qYhYv+eStfWLCmDEiBFMmTKl3WGYmQ0okv7ac6n63ARkZtaherwDkDSMNCx8I9KozAsjYnwevTcWGAnsmgcZkRNCzQb+kg9xd0Sc0PTIre0m/+at7Q7BbMDbe68iKbVao0gT0BLglIiYllMGTJU0iTSC76Ok5FLVHomI3mZaNDOzPtRjBZDziczPy4slzQaG5vwlpFxPZmY20PSqDyA37+wE1Mr7XmlzpcnFb5f03jrHOl5pXtspixYt6k0YZmbWBIUrAElrkJI/ndTDKMP5wPCI2ImUtOzHktaqLhQRF0bEzhGx8/rrN/wUk5mZNahQBZBnBroWuCIiruuubES8EhFP5+WppElDtiobqJmZNVeRp4AEXAzMzqmIeyq/PvBMRLwuaQtSOthHS0dq/U47n14ws/KKPAW0O2m+0pmSpud1ZwCrkCbkXh+4UdL0iNiXNJH3NyV15SQ/ISKeaX7oZmZWRpGngO4kzdlZyzJpZyPiWlJzkZmZ9WMeCWxm1qFcAZiZdagyqSAGkyaOHgHMAQ6LiGclrQtcQpon9B/AMRExqzXhWzuNHTu23SGY9Yr/ZpdW5A6gKxXESGA3YLSkbYExwOSI2BKYnF9D6iCeHhHbkyakHt/8sM3MrKweK4CImB8R0/LyYlKit6HAwcDEXGwicEhe3pZUIRARDwAjJG3Y5LjNzKykMqkgNsx5grryBW2Qi80gJYlD0q7AZsCmNY7lVBBmZm3UilQQ44B185iBzwN/JjUjLcWpIMzM2qvQjGB1UkEskLRxRMyXtDGwECBXDkfn/QQ8lv/ZcsYdamYDW493AN2kgrgeGJWXRwG/yOXXkbRyXv8Z4I4e7hjMzKwNyqSCGAdcLelY4HHg0LxtJHCZpNeB+4FjmxuymZk1Q9lUEHvXKH8XKQGcmZn1Yx4JbGbWoRquACQNk/RbSbMl3SfpxLx+R0l3S5qeH/PctXnhmplZsxR6CqiOepPFfw/4RkTcLGn//HrP8qFafzN3zO/aHYINUJuOqzlTrPWxhiuAepPFAwF0TQG5NjCvbJBmZtZ8Ze4A/qlqhPBJwC2SziY1Mf1rM85hZmbNVboTuMYI4c8CJ0fEMOBk0hiCWvs5FYSZWRuVqgDqjBAeBXQt/xSo2QnsVBBmZu3VcBNQNyOE5wF7ALcBewEPlQnQ+i935JkNbGX6AOqNED4OGC9pEGlCmOPLhWhmZq1Q5img7kYIv6vR45qZWd/wSGAzsw7lCsDMrEO1rAKQdHJOETFL0pWSVm3VuczMrPeaMhCsmqShwBeAbSPiZUlXA4cDE1pxPmuPcz5+QLtDsBY75aob2h2CtVArm4AGAW/JTwOthlNCmJn1Ky2pACLiSeBs0kQx84HnI+LWyjIeCWxm1l4tqQAkrQscDGwObAKsLumTlWU8EtjMrL1a1QT0AeCxiFgUEa+RUkM4KZyZWT/Skk5gUtPPbpJWA14mTR05pUXnsjZxB6HZwNaqPoB7gGuAacDMfJ4LW3EuMzNrTKvuAIiIrwNfb9XxzcysHI8ENjPrUK4AzMw6VI8VgKRLJC2UNKti3Q6S7pI0U9IvJa2V168s6dK8foakPVsYu5mZlVCkD2ACcD5wWcW6i4AvRcTtko4BTgW+SpoLgIh4h6QNgJsl7RIRbzQ3bOsPfnDCb9odgrXA6Av2ancI1kd6vAOIiDuAZ6pWbw3ckZcnAR/Ly9sCk/N+C4HngJ2bEqmZmTVVo30As4CD8vKhwLC8PAM4WNIgSZuTJoYZVmN/p4IwM2uzRiuAY4DRkqYCawKv5vWXAHNJg77OA/4ALKl1AKeCMDNrr4bGAUTEA8A+AJK2Aj6c1y8BTu4qJ+kPeFJ4M7N+qaEKQNIGEbFQ0grAV4AL8vrVAEXEi5I+CCyJiPubF671J+4sNBvYeqwAJF0J7AkMkTSXNLp3DUmjc5HrgEvz8gbALZLeAJ4Ejmp6xGZm1hQ9VgARcUSdTeNrlJ1DekLIzMz6OY8ENjPrUK4AzMw6VJE+gGGkUcAbAW8AF0bEeEmDgauAEcAc4LCIeFbS2sDlwPB8/LMj4tJax7aBbfY2I9sdgvXCyAdmtzsE62eK3AEsAU6JiJHAbqTn/7cFxgCTI2JL0ujfMbn8aOD+iNiB1Hl8jqSVmx65mZmVUiQVxPyImJaXFwOzgaGkOX8n5mITgUO6dgHWlCRgDVIaiZqDwczMrH161QcgaQSwE3APsGFEzIdUSZAeAYWUOG4kMI80G9iJtZLBORWEmVl7Fa4AJK0BXAucFBF/76bovsB0YBNgR+D8rnTRlZwKwsysvQpVAJJWIn34XxER1+XVCyRtnLdvDCzM648GrovkYeAxYJvmhm1mZmUVeQpIwMXA7Ig4t2LT9cAoYFz++Yu8/nFgb+B3kjYkDQx7tJlBW//gp0rMBrYiuYB2J6V0mClpel53BumD/2pJx5I+9A/N274FTJA0ExBwekQ81dywzcysrCKpIO4kfZDXsneN8vPImULNzKz/8khgM7MO5QrAzKxDNTQfANRPEZG3fR74HGkA2I0RcVoTYrV+5h0T39HuEKyOmaNmtjsEGwAargB4M0XENElrAlMlTQI2JI0S3j4iXpG0QbdHMTOztmi4Asijf7tGAi+W1JUi4jhgXES8krctrH8UMzNrl6b0AVSliNgKeK+keyTdLmmXOvs4FYSZWRuVrgBqpIgYBKxLyhx6KmmswDKPkToVhJlZe5WqAOqkiJjLm6kg/kjqIB5SLkwzM2u2Mk8B1UsR8XNgL+A2SVsBKwMeCbwc8pMmZgNbmaeA6qWIuAS4RNIs4FVgVEREuTDNzKzZyjwF1F2KiE82elwzM+sbHglsZtahXAGYmXWoMn0ASDqRNPBLwI8i4jxJ3yKNBH6DNEnMp3OGUFvejF273RF0rrHPtzsCWw40fAcg6e2kD/9dgR2AAyRtCZwVEdtHxI7ADcDXmhKpmZk1VZkmoJHA3RHxUkQsAW4HPlI1X/DqgJ8AMjPrh8pUALOA90laT9JqwP7AMABJ35b0BHAkde4AnArCzKy9Gq4AImI28F1gEvArYAYpQygRcWZEDAOuIKWFrrW/U0GYmbVRqaeAIuLiiHhnRLwPeAZ4qKrIj4GPlTmHmZm1RtmngDaIiIWShgMfBd4tacuI6KoIDgIeKBuk9VN+EsVsQCtVAQDXSloPeA0YHRHPSrpI0takx0D/CpxQNkgzM2u+UhVARLy3xjo3+ZiZDQAeCWxm1qFcAZiZdagem4AkDQMuAzYitetfGBHjJZ0FHEhK+fwIcHREPJenh5wN/CUf4u6IcD/AcmjEmBvbHUJHmjPuw+0OwZYTRe4AlgCnRMRI0jSPoyVtS3r+/+0RsT3wIPDlin0eiYgd8z9/+JuZ9UM9VgARMT8ipuXlxaRv90Mj4tacAgLgbmDT1oVpZmbN1qs+gNy8sxNwT9WmY4CbK15vLunPkm6XtMyTQvlYTgVhZtZGhSsASWuQJoA/qTLhm6QzSc1EV+RV84HhEbET8EXgx5LWqj6eU0GYmbVXoXEAklYiffhfERHXVawfBRwA7N01729EvAK8kpenSnoE2AqY0uTYrc3cGWk2sPV4ByBJwMXA7Ig4t2L9h4DTgYMi4qWK9etLWjEvbwFsCTza7MDNzKycIncAuwNHATMlTc/rzgD+G1gFmJTqiH8+7vk+4JuSlgCvAydExDNNj9zMzErpsQKIiDtJUz5Wu6lO+WtJzUVmZtaPeSSwmVmHKtIHMEzSbyXNlnRfngi+cvuXJIWkIfn1qZKm53+zJL0uaXCrfgEzM2tMkT6ArpHA0yStCUyVNCki7s9pIj4IPN5VOCLOAs4CkHQgcLL7AJZPTgXR9/zklTVTwyOB8+bvA6dRf+L3I4ArmxCnmZk1WcMjgSUdBDwZETPqlF0N+BDuEDYz65cKTwhTORKY1Cx0JrBPN7scCPy+XvOPpOOB4wGGDx9eNAwzM2uSQncANUYCvxXYHJghaQ4pEdw0SRtV7HY43TT/OBWEmVl7FZkPYJmRwBExE9igoswcYOeIeCq/XhvYA/hkC2K2fsIdkmYDW5E7gK6RwHtVPN65fw/7fAS4NSJeLB2hmZm1RJmRwJVlRlS9ngBMKBGXmZm1mEcCm5l1KFcAZmYdqvBjoLXkzt/FpKyfSyJiZ0k7ABcAawBzgCMrJ5AxM7P+oVQFkL2/6+mf7CLgSxFxu6RjgFOBrzbhPG230W+n91yog/zt/Tu2OwQzK6EVTUBbA3fk5UnAx1pwDjMzK6lsBRDArZKm5pG9ALOAg/LyocCwkucwM7MWKFsB7B4R7wT2A0ZLeh9wTF6eCqwJvFprR0nHS5oiacqiRYtKhmFmZr1VqgKIiHn550LgZ8CuEfFAROwTEe8ipYJ4pM6+TgVhZtZGDXcCS1odWCEiFuflfUhzAW8QEQslrQB8hfRE0HLBnZ5mtjwpcwewIXCnpBnAH4EbI+JXwBGSHgQeAOYBl5YP08zMmq3hO4CIeBTYocb68cD4MkGZmVnreSSwmVmHcgVgZtahik4Ic4mkhZJmVawbLGmSpIfyz3Xz+oMl3ZvTRk+R9J5WBW9mZo0r2gcwATgfuKxi3RhgckSMkzQmvz4dmAxcHxEhaXvgamCb5oXcdyb/5q3tDqFf23uvmk/4mtkAUegOICLuAKrn9j0YmJiXJwKH5LIvRETk9auTRgubmVk/U+ox0IiYD5B/Vk4R+RFJDwA3kkYGL8Mjgc3M2qslncAR8bOI2IZ0V/CtOmU8EtjMrI3KVAALJG0MkH8urC6Qm47eKmlIifOYmVkLlJkP4HpgFDAu//wFgKS3AY/kTuB3AisDT5cNtB3cyWlmy7NCFYCkK4E9gSGS5gJfJ33wXy3pWOBxUupnSPn/PyXpNeBl4OMVncJmZtZPFKoAIuKIOpv2rlH2u8B3ywRlZmat55HAZmYdyhWAmVmH6rECqJMGYkdJd1eke9g1rz8yp4G4V9IfJC2TLdTMzPqHIn0AE1g2DcT3gG9ExM2S9s+v9wQeA/aIiGcl7QdcCPxLUyNuobFjx7Y7hAHF18tsYOuxAoiIOySNqF4NrJWX1yZN/EJE/KGizN3ApuVDNDOzVmh0HMBJwC2SziY1I/1rjTLHAjfXO4Ck44HjAYYPH95gGGZm1qhGO4E/C5wcEcOAk4GLKzdKej+pAji93gGcCsLMrL0arQBGAdfl5Z8Cu3ZtyCmgLwIOjogBOQLYzKwTNNoENA/YA7gN2At4CEDScFLFcFREPNiMAPuSOzXNrJP0WAHUSQNxHDBe0iDgH+S2fOBrwHrA/0oCWBIRO7cgbjMzK6nIU0D10kC8q0bZzwCfKRuUmZm1nkcCm5l1KFcAZmYdqkgfwDDSKOCNgDeACyNivKTBwFXACGAOcFhEPFux3y6kwWAfj4hrmh96Y+aO+V27Q1hubDruve0OwcxKKHIHsAQ4JSJGArsBoyVtC4wBJkfElsDk/BoASSuSUkLf0vyQzcysGXqsACJifkRMy8uLgdnAUOBgYGIuNpE0/2+XzwPXUmOaSDMz6x961QeQcwLtBNwDbBgR8yFVEsAGucxQ4CPABT0c6/icSXTKokWLeh+5mZmVUrgCkLQG6Vv9SRHx926KngecHhGvd3c8p4IwM2uvonMCr0T68L8iIrpSQCyQtHFEzJe0MW829+wM/CQPBBsC7C9pSUT8vMmxm5lZCUWeAhIp2dvsiDi3YtP1pJxA4/LPXwBExOYV+04AbuhPH/5+csXMLClyB7A7cBQwU9L0vO4M0gf/1ZKOBR4HDm1NiGZm1gpFUkHcCajO5r172PfTDcRkZmZ9wCOBzcw6lCsAM7MOVaQT+BLgAGBhRLw9r7sK2DoXWQd4LiJ2zNu2B35ImjP4DWCXiPhHC2LvtXM+fkC7Q1iunHLVDe0OwcxKKNIJPAE4n5QPCICI+HjXsqRzgOfz8iDgctKEMDMkrQe81syAzcysOYp0At+RRwAvIz8iehhpVjCAfYB7I2JG3tdTQpqZ9VNl+wDeCyyIiIfy662AkHSLpGmSTqu3o1NBmJm1V9kK4AjgyorXg4D3AEfmnx+RVPNRUaeCMDNrr4YrgNze/1HSnABd5gK3R8RTEfEScBPwznIhmplZKxTKBVTHB4AHImJuxbpbgNMkrQa8CuwBfL/EOZrKT62Ymb2pxzsASVcCdwFbS5qbUz8AHM7SzT/kGcHOBf4ETAemRcSNzQ3ZzMyaochTQEfUWf/pOusvJz0KamZm/ZhHApuZdShXAGZmHapMJzCStmbpp4C2AL5GGjV8FTACmAMclvsH+tQPTvhNX5+yo4y+YK+eC5lZv1XqDiAi/hIRO+Y8QO8CXgJ+BowBJkfElsDk/NrMzPqRZjYB7Q08EhF/BQ4GJub1E4FDmngeMzNrgmZWAJWPhW4YEfMB8s8Nqgs7FYSZWXs1pQKQtDJwEPDTovs4FYSZWXs16w5gP9KgrwX59QJJGwPknwubdB4zM2uSUk8BVahOCnc9MIo0cfwo4BdNOk+v+CkVM7P6St8B5Lw/HwSuq1g9DvigpIfytnFlz2NmZs1V+g4gZ/1cr2rd06SngszMrJ/ySGAzsw7lCsDMrEP12AQkaRgptcNGwBvAhRExXtJgaqR7kLQNcClpIpgzI+LsFsVe1+xtRvb1KTvSyAdmtzsEMyuhyB3AEuCUiBgJ7AaMlrQt9dM9PAN8AejzD34zMyuuxwogIuZHxLS8vBiYDQylTrqHiFgYEX8CXmtJxGZm1hS96gOQNALYCbiHAukeejiWU0GYmbVR4QpA0hrAtcBJEfH3sid2Kggzs/YqNA5A0kqkD/8rIqJrwNcCSRtHxPz+lu7BnZNmZj0rMim8gIuB2RFxbsWmrnQP0MZ0D2Zm1pgidwC7A0cBMyVNz+vOIKV3uFrSscDjwKEAkjYCpgBrAW9IOgnYthnNRmZm1jw9VgARcSegOpuXSfcQEX8DNi0Zl5mZtZhHApuZdagifQDDJP1W0mxJ90k6Ma8/NL9+Q9LOVftsL+muvH2mpFVb9QuYmVljivQBdI0EniZpTWCqpEnALOCjwA8rC0saBFwOHBURMyStRx8NCnvHxHf0xWksmzlqZrtDMLMSivQBzAe6BnwtljQbGBoRkwDSQ0JL2Qe4NyJm5H2ebmrEZmbWFGVGAtezFRCSbpE0TdJpjYdnZmatUnhCmF6MBB4EvAfYBXgJmCxpakRMrjre8cDxAMOHD+9t3GZmVlKhO4A6I4HrmQvcHhFP5dnCbiKlhl6KU0GYmbVXkfkA6o0ErucW4LQ8V/CrwB7A90tFWZA7Jc3MiiszEngV4H+A9YEbJU2PiH3zpDDnAn8CArgpIm5sQexmZlZC2ZHAP6uzz+WkR0HNzKyf8khgM7MO5QrAzKxDlUkFMVjSJEkP5Z/r5vWnSpqe/82S9HqeQN7MzPqRMqkgPk2aFH6cpDGkSeFPj4izgLMAJB0InBwRz7Qm/Cpj1+6T01g29vl2R2BmJTR9UvgqRwBXNidUMzNrppZNCp/HAXyINICs1rE8KbyZWRu1clL4A4Hf12v+8UhgM7P2KpMKYkGeDJ46k8Ifjpt/zMz6rTKpILomhR9H1aTwktYmpYD4ZFOj7Yk7Jc3MCmv6pPDZR4BbI+LFZgZrZmbN0/RJ4fM+E4AJDUdlZmYt55HAZmYdyhWAmVmHKpIK4hJJCyXNqli3g6S7JM2U9EtJa+X1K0mamNfPlvTlVgZvZmaNK9IJPAE4H7isYt1FwJci4nZJxwCnAl8ldQSvEhHvyAPB7pd0ZUTMaW7YSxsxxtMNtMOccR9udwhmVkKRVBB3ANWDubYG7sjLk4CPdRUHVpc0CHgLaUawIoPGzMysjzXaBzALOCgvHwoMy8vXAC8C80mPhp5dbySwU0GYmbVXoxXAMcBoSVOBNUnf9AF2BV4HNgE2B06RtEWtAzgVhJlZexXpA1hGRDwA7AMgaSugqzH4E8CvIuI1YKGk3wM7A482IVYzM2uihioASRtExEJJKwBfAS7Imx4H9pJ0ObAasBtwXlMi7YY7I83Meq/IY6BXAncBW0uam1M/HCHpQeABYB5waS7+A2ANUh/Bn4BLI+LelkRuZmalFEkFcUSdTeNrlH2BpXMCmZlZP6WIaHcMSFoE/LXdcVQYAjzV7iCqOKZi+mNM0D/jckzF9MeYIMW1ekQ0/BRNv6gA+htJUyJi53bjDMHuAAAFLklEQVTHUckxFdMfY4L+GZdjKqY/xgTNicu5gMzMOpQrADOzDuUKoLYL2x1ADY6pmP4YE/TPuBxTMf0xJmhCXO4DMDPrUL4DMDPrUK4AzMw6VMdWAJIGS5ok6aH8c9065X4l6TlJN1St31zSPXn/qySt3IcxjcplHpI0qmL9bZL+Iml6/rdBiVg+lI/1sKQxNbavkn/vh/N1GFGx7ct5/V8k7dtoDM2KSdIISS9XXJcLqvdtYUzvkzRN0hJJ/1a1reb72OaYXq+4Ttc3K6aCcX1R0v2S7pU0WdJmFdvada26i6kl16pATCcoTbo1XdKdkrat2Na7/3sR0ZH/gO8BY/LyGOC7dcrtDRwI3FC1/mrg8Lx8AfDZvogJGExKrjcYWDcvr5u33Qbs3IQ4VgQeAbYAVgZmANtWlfkP4IK8fDhwVV7eNpdfhZQR9hFgxTbHNAKY1YK/oSIxjQC2J02o9G9F3sd2xZS3vdDs69SLuN4PrJaXP1vx/rXzWtWMqVXXqmBMa1UsH0RKwNnQ/72OvQMADgYm5uWJwCG1CkXEZGBx5TpJAvYizX/Q7f4tiGlfYFJEPBMRz5Im5PlQE85daVfg4Yh4NCJeBX6SY6sX6zXA3vm6HAz8JCJeiYjHgIfz8doZU6v0GFNEzImUD+uNqn1b9T6WiamVisT124h4Kb+8G9g0L7fzWtWLqVWKxFQ5ydbqpIm4oIH/e51cAWwYEfMB8s/eNJesBzwXEUvy67nA0D6KaSjwRMXr6nNfmm8Nv1riw6+ncyxVJl+H50nXpci+fR0TwOaS/izpdknvbUI8RWNqxb6tPO6qShM13S2pGV9qGo3rWODmBvfti5igNdeqUEySRkt6hNRq8IXe7FupoXTQA4WkXwMb1dh0ZtlD11hX6HnaJsTU3bmPjIgnJa0JXAscxdJzORdV5PerV6bha9ODMjHNB4ZHxNOS3gX8XNJ2Vd+kWhVTK/Zt5XGHR8Q8pYmcfiNpZkQ80pdxSfokaR6RPXq7bx/GBK25VoViiogfAD+Q9AlSSv5RRfettFxXABHxgXrbJC2QtHFEzJe0MbCwF4d+ClhH0qD8TXNTUlrsvohpLrBnxetNSW3/RMST+ediST8m3f41UgHM5c1pPrvOUf37dZWZqzQH9NqkuaOL7NuIhmOK1ED6CkBETM3fnLYCpvRBTN3tu2fVvreVjKdsTETEvPzzUUm3ATuR2pL7JC5JHyB9GdojIl6p2HfPqn1va3NMrbpWvX3/fgL8vwb37ehO4LNYusP1e92U3ZNlO4F/ytKdwP/RFzGROsIeI3WGrZuXB5Mq8yG5zEqkNvATGoxjEKmjbXPe7IjarqrMaJbucL06L2/H0h1Rj9KcTuAyMa3fFQOpc+1JYHBfxFRRdgLLdgIv8z62OaZ1gVXy8hDgIao6IFv8/nV9gG5Z5G++zTG15FoVjGnLiuUDgSl5udf/90q/sQP1H6lteHJ+4yZ3/UGRbvMuqij3O2AR8DKpht03r98C+COpo+WnXX8MfRTTMfm8DwNH53WrA1OBe4H7SPM1NPzBC+wPPJj/+M/M674JHJSXV82/98P5OmxRse+Zeb+/APs18T1rKCbgY/mazACmAQf2YUy75L+bF4Gngfu6ex/bGRPwr8DMfJ1mAsc2+f9cT3H9GlgATM//ru8H16pmTK28VgViGp//nqcDv6Wigujt/z2ngjAz61Cd/BSQmVlHcwVgZtahXAGYmXUoVwBmZh3KFYCZWYdyBWBm1qFcAZiZdaj/D6qWjWlsDPokAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ridge = RidgeCV(alphas = [0.01, 0.1, 1, 10, 100, 1000],store_cv_values=True )\n",
    "# 生成学习器实例\n",
    "\n",
    "ridge.fit(X_train, y_train)\n",
    "# 用训练数据度模型进行训练\n",
    "\n",
    "alpha = ridge.alpha_\n",
    "print(\"Best alpha :\", alpha)\n",
    "#通过交叉验证得到的最佳超参数alpha\n",
    "\n",
    "mse_cv = np.mean(ridge.cv_values_, axis = 0)\n",
    "rmse_cv = np.sqrt(mse_cv)\n",
    "print(\"cv of rmse :\", min(rmse_cv))\n",
    "# 交叉验证估计的测试误差\n",
    "\n",
    "y_train_rdg = ridge.predict(X_train)\n",
    "rmse_train = np.sqrt(mean_squared_error(y_train,y_train_rdg))\n",
    "print(\"RMSE on Training set :\", rmse_train)\n",
    "#训练上测试，训练误差，实际任务中这一步不需要\n",
    "coefs = pd.Series(ridge.coef_, index = X_train.columns)\n",
    "print(\"Ridge picked \" + str(sum(coefs != 0)) + \" features and eliminated the other \" +  \\\n",
    "      str(sum(coefs == 0)) + \" features\")\n",
    "\n",
    "#正系数值最大的10个特征和负系数值最小（绝对值大）的10个特征\n",
    "imp_coefs = pd.concat([coefs.sort_values().head(10),\n",
    "                     coefs.sort_values().tail(10)])\n",
    "imp_coefs.plot(kind = \"barh\")\n",
    "plt.title(\"Coefficients in the Ridge Model\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Lasso"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best alpha : 0.001\n",
      "Try again for more precision with alphas centered around 0.001\n",
      "Best alpha : 0.0009000000000000001\n",
      "cv of rmse : 0.2768184184747502\n",
      "RMSE on Training set : 0.28012660641978054\n",
      "Lasso picked 116 features and eliminated the other 135 features\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XmcXFWd/vHPA4iILLKEPSGoCDiA6ETGGVE2F0RWB0YQNSzq4KACopABB1DUX9QRZQZmkEEkDIigwBhZhBgVRAENMSFikB2JRBJANhEh4fn9cU9L0VZ3V9earjzv16tfXXXuufd+b3dy+ta553yPbBMREf1rhV4HEBERnZWGPiKiz6Whj4joc2noIyL6XBr6iIg+l4Y+IqLPpaGPv5C0haRfSnpC0sckvUTS9yQ9Junbkg6SdE0Dxzle0tndiHmYGCZIelLSim063smSzm/HsZY3ku6V9JYG6k2UZEkrdSOu5Uka+jFI0nskzSoN2UJJV0naoQ2HPhb4se3Vbf8HsB+wPrCO7f1tX2D7bSMdxPbnbX+g1WBa+Y9v+7e2V7O9tInz7iRpwWj3G8Xxz5X02U4dv1klLkvaa1D5V0v5wT0KLVqUhn6MkfRx4KvA56ka4QnAfwF7t+HwmwK3Dnp/u+0lbTh2jA23A5MH3pQ/svsDd/UsomhZGvoxRNKawGeAI2xfavuPtp+1/T3bnyx1XlzuwB4oX1+V9OKaY+whaY6kRyX9TNK2pfyHwM7A6eWTwoXAicC7y/vDJB0s6fqaY/2NpBmSHpH0oKTjS/kLujkkvaGc61FJcyXtVLPtx5JOkfTT0mV0jaR1y+bryvdHSwx/L+mVkq4t3UkPSbpoiJ/VCz4NjHCe2v1eClwFbFTO+aSkjcrmlSWdV/a/VdKkmv02knSJpMWS7pH0sUZ+p3XOf5qk+yU9LulmSW+q2bZ9+ST3ePl5n1rKV5F0vqSHy8/4F5LWr4lrevkd3SnpgyOE8D3gjZLWKu93A24Bfl8TxwqSPiXpPkmLys9kzZrt7yvbHpZ0wqDrW0HSFEl3le0XS1q7mZ9VNC4N/djy98AqwGXD1DkBeAOwHfAaYHvgUwCSXgecA/wzsA7wNWC6pBfb3gX4CfCR0uVxINWnhovK+6/XnkTS6sAPgO8DGwGvBGYODkbSxsAVwGeBtYFPAJdIGldT7T3AIcB6wMqlDsCby/eXlRhuAE4BrgHWAjYB/nOYn8VgQ53nL2z/EXgH8EA552q2Hyib9wK+BbwMmA6cXq5xBaoGci6wMbArcJSkt48itgG/oPrdrQ18E/i2pFXKttOA02yvAbwCuLiUTwbWBMZT/V4PB/5Utl0ILKD6He0HfF7SrsOc/+lybQeU9+8HzhtU5+DytTPwcmA1nv9ZvBr4b+B95ZzrUP2eBnwM2AfYsWz/A3DGMPFEG6ShH1vWAR4aoSvlIOAzthfZXgx8muo/HcAHga/Zvsn2UtvTgD9T/WEYrT2A39v+su2nbT9h+6Y69d4LXGn7StvP2Z4BzAJ2r6nzDdu32/4TVeO13TDnfZaqS2mjct7rh6k72GjOU8/15TqWAv9L9YcU4PXAONufsf2M7buB/+H5xrJhts+3/bDtJba/DLwY2KJsfhZ4paR1bT9p+8aa8nWAV5bf6822H5c0HtgBOK78rOYAZ/P8v4ehnAe8v9yl7wj836DtBwGn2r7b9pPAvwIHlE9P+wGX277O9p+BfwOeq9n3n4ETbC8o208G9lMewHZUGvqx5WFg3RH+U2wE3Ffz/r5SBlUDeUz5eP+opEep7gI3YvTG01i/7abA/oPOuQOwYU2d39e8forqDnEoxwICfl66Tw4dRcyjOU8j+69SfhebUnX11F7j8VTPUEZF0jGS5peuqUep7tQHupgOA14F3Fa6Z/Yo5f8LXA18S1V33RclvYjq9/qI7SdqTnEf1aeOIZU/nuOoPgleXv4w1qr3b2ylcr0bAffXHOuPVP9uB2wKXFbzc5oPLKWJn1U0Ln9Fx5YbqD5a7wN8Z4g6D/DCh6oTShlU/wE/Z/tzbYjlfuDABuv9r+2R+obr+avUqrZ/T/XJBFUjjX4g6TrbdzZx/IbPO4L7gXtsb97KSUt//HFUXT+32n5O0h+o/rBh+w7gwNJV9C7gO5LWKY3pp4FPS5oIXAn8hqqLa21Jq9c09hOA3zUQzvlUz2h2rrNt4N/YgAnAEuBBYCGwVc01rUr1aWPA/cChtn9a5/onNhBXNCF39GOI7ceo/vOdIWkfSatKepGkd0j6Yql2IfApSePKw8YTqf7TQtWdcLikv1PlpZLeWfrbR+tyYANJR6l6ALy6pL+rU+98YE9Jb5e0YnlwuJOkTerUHWwx1cf+lw8USNq/Zt8/UDXKox5COYIHgXVqHzCO4OfA45KOUzX3YEVJW0t6/TD7DPwsBr5WBlanajAXAytJOhFYY2AHSe+VNM72c8CjpXippJ0lbaNqzsDjVF05S23fD/wM+H/lHNtSfSq4oIFr+g/grTz/QLzWhcDRkjaTtBrPP8tZQnUDsoekHco1fYYXtjNnAp+TtGm5pnGS2jFiLIaRhn6MsX0q8HGqj9WLqe6QPsLz/aifpeoDvwWYB8wuZdieRXU3fDpVI3kn1UO1ZuJ4gqoh2JOqS+MO6tz9lcZmb6qujIF4P0kD//ZsPwV8Dvhp+aj/Bqr+8JskPUn10PBI2/c0cw3DnPc2qsbs7nLeYbu2Sp/9nlR9/vcAD1H1hQ/3h2IK1QPTga8fUnW/XEU1xPE+qk9v99fssxtwa7n204ADbD8NbEDVwD5O1RVyLc//cT8QmEh1F34ZcFJ5TjLSz+AR2zNdf8GKc6i6i64r1/s08NGy363AEVQPkhdS/TurnZNwGtXv7RpJTwA3AvVuEKKNlIVHIiL6W+7oIyL6XBr6iIg+l4Y+IqLPpaGPiOhzy8Q4+nXXXdcTJ07sdRgREWPKzTff/JDtcSPVWyYa+okTJzJr1qxehxERMaZIum/kWum6iYjoeyPe0ZfESOdRTcp4DjjL9mmS9qdKSLQVsH2ZjDMwjXk+1RRsgBttH972yJdTM3/4il6HEBFttOsunU/130jXzRLgGNuzy1T5myXNAH5FlW/ja3X2ucv2aDMDRkREB4zY0NteSDWVGdtPSJoPbDwwjVpSZyOMiIiWjKqPvnTLvBaol3e81maqFpm+VjUr5Aw61odUrZYza/HixaMJIyIiRqHhhr5kqbsEOMr248NUXQhMsP1aquRb35S0xuBKts+yPcn2pHHjRhwdFBERTWqooS+LGFwCXGD70uHq2v6z7YfL65upFqd4VauBRkREcxoZdSPg68D8kiJ3pPrjqFa1WSrp5cDmwN0tRxpAd57QR0R/aWTUzRup1picJ2lOKTueai3L/6RacuwKSXNsv51qQefPSFpCtSDE4bYfaX/oERHRiEZG3VxPWcqsjsvq1L+EqpsnIiKWAZkZGxHR59LQR0T0uaaTmg2TGmE7qgWAV6GaVfsvtn/ejmADTj755F6HENET+bffvFayVw6VGuGLwKdtXyVp9/J+p9ZDjYiIZjTd0A+VGgEwMDBBak2q1ecjIqJH2pKPflBqhKOAqyX9O9UzgH8YYp8PAR8CmDBhQjvCiIiIOlp+GFsnNcKHgaNtjweOppps9VeSAiEiojtku/mdq9QIlwNXD8yalfQY8DLbLrNqH7P9V7luak2aNMlZYSoiYnQk3Wx70kj1mr6jHyY1wgPAjuX1LsAdzZ4jIiJa10of/VCpET4InCZpJeBpSj98RET0RiujboZLjfC3zR43IiLaKzNjIyL63IgNvaTxkn4kab6kWyUdWcrXljRD0h3l+1qlfC1Jl0m6RdLPJW3d6YuIiIihtbI4+MHATNtTJU0BpgDHUfXTz7G9r6QtgTOAXTsT/vJnwZSf9DqEiK7YZGrdVUijCSPe0dteaHt2ef0EMDADdm9gWqk2DdinvH41MLPUvw2YKGn9NscdERENamVx8PVLGoSBdAjrlWpzgXeV+tsDmwKbtCfciIgYrU4sDj4VWKsMufwo8Euq7p/Bx/uQpFmSZi1evHiUYUdERKMaGl45xOLgD0ra0PZCSRsCiwDKH4FDyn4C7ilfL2D7LOAsqGbGtnohERFRXyuLg08HJlPdwU8Gvlvqvwx4yvYzwAeA60b4BBCjkAdUETFarSwOPhW4WNJhwG+B/cu2rYDzJC0Ffg0c1t6QIyJiNFpdHPyvhk3avgHYvMW4IiKiTTIzNiKiz6Whj4joc42kQDhH0iJJv6ope42kGyTNk/Q9SWuU8pUlfaOUz5W0Uwdjj4iIBjTyMPZc4HTgvJqys4FP2L5W0qHAJ4F/o0pRjO1tJK0HXCXp9bafa2/Yy68vv3uPXocQy4BjLrq81yHEGNJICoTrgEcGFW8BXFdezwD+sbyuTX+wCHgUGHH1k4iI6Jxm++h/BexVXu8PjC+v5wJ7S1pJ0mZUeenH19k/M2MjIrqk2Yb+UOAISTcDqwPPlPJzgAXALOCrwM+ok/4Asjh4RES3NLXCVMlK+TYASa8C3lnKlwBHD9ST9DOyZmxERE811dBLWs/2IkkrAJ8CzizlqwKy/UdJbwWW2P51+8KNPISLiNFqJNfNhcBOwLqSFgAnAatJOqJUuRT4Rnm9HnC1pOeA31GlToiIiB5qJAXCgUNsOq1O3XupRuRERMQyIjNjIyL6XBr6iIg+10gKhPGSfiRpvqRbJR1ZyteWNEPSHeX7WqV8zZIWYW6pf0inLyIiIobWyKibJcAxtmdLWh24WdIM4GBgpu2pkqYAU4DjgCOAX9veU9I44DeSLigLkUSLzjj8h70OIXrsiDN36XUIMcY0kgJhoe3Z5fUTwHxgY2BvYFqpNg3YZ2AXYPWyMtVqVOkT6k6aioiIzhtVH72kicBrgZuA9W0vhOqPAdXQSqgSoG0FPADMA46sl9QsKRAiIrqj4YZe0mpUC4QfNcIasG8H5gAbAdsBpw+kMa6VFAgREd3RUEMv6UVUjfwFti8txQ9K2rBs3xBYVMoPAS515U7gHmDL9oYdERGNamRmrICvA/Ntn1qzaTowmWqR8MnAd0v5b6nWkv2JpPWpJlDd3c6gl2d5EBcRo9XIqJs3UqUymCdpTik7nqqBv1jSYVSN+/5l2ynAuZLmUS0qfpzth9obdkRENKqRFAjXUzXY9exap/4DlMyWERHRe5kZGxHR59LQR0T0uUYexo6nWhh8A+A54Czbp9Vs/wTwJWCc7YckfRI4qOb4W5Vtg9edjSbM33KrXocQbbbVbfN7HUL0uUbu6AdSIGwFvIFqCcFXw1/+CLyV6mEsALa/ZHs729sB/wpcm0Y+IqJ3WkmBAPAV4FiqtAf1HAhc2IY4IyKiSU2nQJC0F/A723OHqLsqsBvVRKt625MCISKiC5pKgUDVnXMCcOIwu+wJ/HSobpukQIiI6I5mUyC8AtgMmCvpXmATYLakDWp2O4B020RE9FxTKRBsz+P5bJWUxn7SwAxYSWsCOwLv7UDMy7WM0IiI0Wrkjn4gBcIukuaUr91H2Gdf4Brbf2w5woiIaEmrKRAG6kwc9P5c4NwW4oqIiDbJzNiIiD6Xhj4ios81nQJB0peohlA+A9wFHGL70TLWfj7wm3KIG20f3oHYl0vbTNum1yEst+ZNntfrECKa0koKhBnA1ra3BW6nSncw4K6BNAhp5CMieqvpFAi2r7G9pFS7kWosfURELGOaToEwaNOhwFU17zeT9EtJ10p60xDHSgqEiIguaCoFgu3Ha8pPoOreuaAULQQm2H4t8HHgm5LWGHy8pECIiOiOZlMgDJRPBvYADrJtANt/tv1weX0z1YPaV7U78IiIaExTKRBK+W7AccCOtp+qKR8HPGJ7qaSXA5sDd7c98uVURn5ExGiN2NDzfAqEeZLmlLLjgf8AXgzMqP4W/GUY5ZuBz0haAiwFDs/CIxERvdNKCoQrh6h/CUPkoI+IiO7LzNiIiD6Xhj4ios810kffFElHAx+gWk92HlWKhKc7db7lxslr9jqC5c/Jj/U6goiWdOSOXtLGwMeoFiPZGliRasWpiIjosk523awEvETSSsCqwAMdPFdERAyhIw297d8B/w78lmqm7GO2r6mtkxQIERHd0amum7WAvakWEN8IeKmkF6wfmxQIERHd0amum7cA99hebPtZ4FLgHzp0roiIGEanRt38FniDpFWBPwG7ArM6dK7lS0aARMQodaqP/ibgO8BsqqGVKwBndeJcERExvI6No7d9EnBSp44fERGNyczYiIg+l4Y+IqLPtdR1I+lI4INU2S3/x/ZXJZ1CNbTyOWARcLDtTJZqk4lTruh1CH3t3qnv7HUIEW3X9B29pK2pGvntgdcAe0jaHPiS7W1tbwdcDpzYlkgjIqIprXTdbEW12MhTtpcA1wL71q4nC7yUKqlZRET0SCsN/a+AN0tap4yX3x0YDyDpc5LuBw5iiDv6pECIiOiOpht62/OBLwAzgO8Dc4ElZdsJtscDFwAfGWL/pECIiOgC2e3pWZH0eWCB7f+qKdsUuKKkKh7SpEmTPGtWJs5GRIyGpJttTxqpXkvDKyWtV75PAN4FXFgeyA7YC7itlXNERERrWp0Ze4mkdYBngSNs/0HS2ZK2oBpeeR9weKtBRkRE81pq6G2/qU7ZP7ZyzIiIaK/MjI2I6HMjNvSSzpG0SNKvaspeI+kGSfMkfU/SGjXbti3bbi3bV+lU8BERMbJGum7OBU4HzqspOxv4hO1rJR0KfBL4t7I+7PnA+2zPrem/jzZJCoT2SbqDWF6MeEdv+zrgkUHFWwDXldczgIF++bcBt9ieW/Z92PbSNsUaERFNaLaP/ldUQycB9qfMiAVeBVjS1ZJmSzq21QAjIqI1zTb0hwJHSLoZWB14ppSvBOxAlfpgB2BfSbvWO0BSIEREdEdTDb3t22y/zfbfAhcCd5VNC4BrbT9k+yngSuB1QxwjKRAiIrqgqXH0ktazvUjSCsCngDPLpquBY0uSs2eAHYGvtCXSAPIAMSJGr5HhlRcCNwBbSFog6TDgQEm3U6U3eAD4BoDtPwCnAr8A5gCzbWeYSERED414R2/7wCE2nTZE/fOphlhGRMQyIDNjIyL6XBr6iIg+11BDP0QahLUlzZB0R/m+VinfW9ItkuaU4ZM7dCr4iIgYWaOjbs7lr9MgTAFm2p4qaUp5fxwwE5hu25K2BS4GtmxfyN2xwY/m9DqEun6/83a9DiEixpiG7uiHSIOwNzCtvJ4G7FPqPunnl63K4uARET3WSh/9+rYXApTv6w1skLSvpNuAK6hm0UZERI905GGs7ctsb0l1l39KvTpJgRAR0R2tNPQPStoQoHxfNLhC6fJ5haR162xLCoSIiC5oZSnB6cBkYGr5/l0ASa8E7ioPY18HrAw83Gqg3ZaHnhHRLxpq6EsahJ2AdSUtAE6iauAvLikRfkuVrhiq3PTvl/Qs8Cfg3TUPZyMiossaauiHSYPwVymIbX8B+EIrQUVERPtkZmxERJ9LQx8R0ecaSVM8XtKPJM2XdKukIwdt/4QkD4yskbSWpMtKGoSfS9q6U8FHRMTIGumjXwIcY3u2pNWBmyXNsP1rSeOBt1I9jB1wPDDH9r6StgTOoE5f/rJu5g9f0esQ6tp1l7tGrhQRUWPEO3rbC23PLq+fAOYDG5fNXwGO5YVpDl5Nle8G27cBEyWt386gIyKicaPqo5c0EXgtcJOkvYDf2Z47qNpc4F2l/vbApsAmdY6VmbEREV3QcEMvaTXgEuAoqu6cE4AT61SdCqwlaQ7wUeCXpf4LZGZsRER3NDph6kVUjfwFti+VtA2wGTBXElR37LMlbW/798AhZT8B95SviIjogREb+tJYfx2Yb/tUANvzeGG2ynuBSbYfkvQy4CnbzwAfAK6z/Xgngu+kPPSMiH7RSNfNG4H3AbuUVaPmSNp9mPpbAbeWNMXvAI4cpm5ERHTYiHf0tq8HNEKdiTWvbwA2bzmyiIhoi8yMjYjoc2noIyL6XNMpECStLWmGpDvK97UG7fd6SUsl7dep4CMiYmRNp0AADgZm2p4qaQowBTgOQNKKVKmKr+5M2J138skn9zqEupbVuCJi2dVKCoS9gWml2jSq9WEHfJRq3P1fLS8YERHd1XQKBGB92wuh+mNAGVcvaWNgX+DMEY6VFAgREV3QVAqEESZAfRU4zvbS4Y6XFAgREd3RVAqEUvygpA1tL5S0Ic9300wCvlVSI6wL7C5pie3/a3PsERHRgKZSIBTTgclUScwmA98FsL1Zzb7nApePxUY+Dz0jol80ckc/kAJhXslICdXiIlOBiyUdRrXwyP6dCTEiIlrRagqEYVeOsn1wEzFFREQbZWZsRESfS0MfEdHnGhp1MxRJqwDXAS8ux/qO7ZMkfZ1q9I2A24GDbT/ZarDdsmDKT3odwpA2mfqmXocQEWNMq3f0fwZ2sf0aYDtgN0lvAI62/Rrb21I9qP1Ii+eJiIgmtXRHb9vAwJ36i8qXByZUlaGZLwHcynkiIqJ5LffRS1qxDLtcBMywfVMp/wbwe2BL4D/r7JcUCBERXdByQ297qe3tqBYI317S1qX8EGAjqiRo766zX1IgRER0QdtG3dh+FPgxsFtN2VLgIuAf23WeiIgYnVZH3YwDnrX9qKSXAG8BvijplbbvLH30ewK3tSHWrsnIlojoJy019MCGwLSy0MgKwMXAFcBPJK1BNbxyLvDhFs8TERFNanXUzS1U+ekHe2Mrx42IiPbJzNiIiD6Xhj4ios81ko9+PHAesAHwHHCW7dMkrU01omYicC/wT7b/IOmTwEE1x98KGGf7kfaH3z5ffvcevQ6hIcdcdHmvQ4iIMaaRO/olwDG2twLeABwh6dXAFGCm7c2BmeU9tr9ke7sytv5fgWuX9UY+IqKfjdjQ215oe3Z5/QTVBKiNgb2BaaXaNGCfOrsfCFzYnlAjIqIZo+qjlzSRapTNTcD6thdC9ccAWG9Q3VWpJk9dMsSxkgIhIqILGm7oJa1G1WgfNZC0bAR7Aj8dqtsmKRAiIrqjoYZe0ouoGvkLbF9aih+UtGHZviFVUrNaB5Bum4iInmtk1I2ArwPzbZ9as2k6MJlqkfDJwHdr9lkT2BF4b1uj7aCMZomIftXIzNg3Au8D5pV0xADHUzXwF0s6jGpxkf1r9tkXuMb2H9sZbEREjN6IDb3t66ly1tSz6xD7nAuc23RUERHRNpkZGxHR59LQR0T0uVZSIJxCNWnqOaoRNwfbfqBmv9cDNwLvtv2dTgTfrDMO/2GvQ2jaEWfu0usQImKMaSUFwpdsb1tSHVwOnDiwQ8lP/wXg6g7EHBERo9B0CoRBk6ZeCrjm/Uepxt0PHlsfERFdNqqFRwalQEDS54D3A48BO5eyjamGV+4CvH6YY30I+BDAhAkTRh14REQ0pqUUCLZPsD0euAD4SKn6VeC4sjD4kJICISKiOxq6ox8iBUKtb1KtFXsSMAn4VjWhlnWB3SUtsf1/7Qk5IiJGo+kUCJI2t31HebsXcBuA7c1q6pwLXL6sNfIZuRIRy5NWUiAcJmkLquGV9wGHdybEiIhoRSspEK5sYN+Dm4gpIiLaKDNjIyL6XBr6iIg+18jD2HOAPYBFtrcuZdsBZwKrUM2c/RfbP5d0EHBc2fVJ4MO253Yk8ibN33KrXofQkq1um9/rECJijGnkjv5cqrVfa30R+HRJf3BieQ9wD7Cj7W2BU4Cz2hRnREQ0qZGHsdeVGbEvKAbWKK/XBB4odX9WU+dGYJPWQ4yIiFaMKgVCjaOAqyX9O9Wngn+oU+cw4KqhDpAUCBER3dHsw9gPA0eX9AdHU02o+gtJO1M19MfV2RdICoSIiG5p9o5+MnBkef1t4OyBDZK2Le/fYfvh1sJrvzzMjIjlTbN39A8AO5bXuwB3AEiaAFwKvM/27a2HFxERrWpkeOWFwE7AupIWUCUu+yBwmqSVgKcpfe1UI3DWAf6rJDVbYntSB+KOiIgGNTLq5sAhNv1tnbofAD7QalAREdE+mRkbEdHnOtbQS9pN0m8k3SlpSqfOExERw2t21M2wyuLgZwBvBRYAv5A03favO3G+4WwzbZtun7Kj5k2e1+sQImKM6dQd/fbAnbbvtv0M8C1g7w6dKyIihtGphn5j4P6a9wtKWUREdFmnGvp6C5X4BRWkD0maJWnW4sWLOxRGRER0qqFfAIyveb8JJfHZgKRAiIjojo48jAV+AWwuaTPgd8ABwHs6dK5h5eFlRCzvOtLQ214i6SPA1cCKwDm2b+3EuSIiYniduqPH9pU0sIB4RER0VmbGRkT0uTT0ERF9rqWGXtIWkubUfD0u6ShJa0uaIemO8n2tdgUcERGj01Ifve3fANvBX9Ie/A64DJgCzLQ9teS5mcIwq011zMlrdv2UHXfyY72OICLGmHZ23ewK3GX7Pqp0B9NK+TRgnzaeJyIiRqGdDf0BwIXl9fq2FwKU7+sNrpyZsRER3dGWhl7SysBeVOvHNiQzYyMiuqNdd/TvAGbbfrC8f1DShgDl+6I2nSciIkapXROmDuT5bhuA6cBkYGr5/t02nWd08uAyIqL1O3pJq1ItMHJpTfFU4K2S7ijbprZ6noiIaE7Ld/S2nwLWGVT2MNUonIiI6LHMjI2I6HNp6CMi+tyIDb2kcyQtkvSrmrKLatIe3CtpTs22bSXdIOlWSfMkrdKp4CMiYmSN9NGfC5wOnDdQYPvdA68lfRl4rLxeCTgfeJ/tuZLWAZ5tZ8D1TJxyRadPscy4d+o7ex1CRIwxIzb0tq+TNLHeNkkC/gnYpRS9DbjF9tyy78PtCTMiIprVah/9m4AHbd9R3r8KsKSrJc2WdOxQOyYFQkREd7Ta0A+eKLUSsANwUPm+r6S6wyyTAiEiojuabuhLf/y7gItqihcA19p+qIyvvxJ4XWshRkREK1qZMPUW4DbbC2rKrgaOLbNlnwF2BL7SwjkakgeUERFDa2R45YXADcAWkhZIOqxsqk1LDIDtPwCnAr8A5lAlOlt+hsRERCyDGhl1c+AQ5QcPUX4+1RDLiIhYBsh2r2NA0mLgvhYPsy7wUBvCWdblOvvH8nBAVBqzAAADlUlEQVSNkOvspE1tjziaZZlo6NtB0izbk3odR6flOvvH8nCNkOtcFiTXTUREn0tDHxHR5/qpoT+r1wF0Sa6zfywP1wi5zp7rmz76iIior5/u6CMioo409BERfW7MNvSS1pY0Q9Id5ftaQ9T7vqRHJV3e7RibJWk3Sb+RdKekKXW2v7gs/nKnpJuGSiO9rGvgOt9csqAukbRfL2Jshwau8+OSfi3pFkkzJW3aizhb1cB1Hl4WI5oj6XpJr+5FnK0a6Tpr6u0nyZJ6P+TS9pj8Ar4ITCmvpwBfGKLersCewOW9jrnB61oRuAt4ObAyMBd49aA6/wKcWV4fAFzU67g7dJ0TgW2pFr3Zr9cxd/A6dwZWLa8/3Me/zzVqXu8FfL/XcXfiOku91YHrgBuBSb2Oe8ze0QN7A9PK62nAPvUq2Z4JPNGtoNpge+BO23fbfgb4FtW11qq99u8Au5ZFYMaSEa/T9r22bwGe60WAbdLIdf7IVbZXqBqGTbocYzs0cp2P17x9KTAWR4I08v8T4BSqm9GnuxncUMZyQ7++7YUA5ft6PY6nXTYG7q95v6CU1a1jewnVUo7rdCW69mnkOvvBaK/zMOCqjkbUGQ1dp6QjJN1F1Qh+rEuxtdOI1ynptcB428tMd3EraYo7TtIPgA3qbDqh27F0Ub0788F3Po3UWdb1wzU0ouHrlPReYBJVeu+xpqHrtH0GcIak9wCfAiZ3OrA2G/Y6Ja1AlZr94G4F1IhluqG3/Zahtkl6UNKGthdK2hBY1MXQOmkBML7m/SbAA0PUWVAWgFkTeKQ74bVNI9fZDxq6TklvobqB2dH2n7sUWzuN9vf5LeC/OxpRZ4x0nasDWwM/Lr2pGwDTJe1le1bXohxkLHfdTOf5u4HJwHd7GEs7/QLYXNJmklametg6fVCd2mvfD/ihyxOgMaSR6+wHI15n+aj/NWAv22P1hqWR69y85u07gTsYe4a9TtuP2V7X9kTbE6meufS0kR8IbEx+UfVJz6T6xzITWLuUTwLOrqn3E2Ax8Ceqv8Zv73XsDVzb7sDtVE/3Tyhln6H6BwOwCvBt4E7g58DLex1zh67z9eV39kfgYeDWXsfcoev8AfAg1WI9c4DpvY65Q9d5GnBrucYfAX/T65g7cZ2D6v6YZWDUTVIgRET0ubHcdRMREQ1IQx8R0efS0EdE9Lk09BERfS4NfUREn0tDHxHR59LQR0T0uf8P+KuZlRAfFYIAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "lasso = LassoCV(alphas = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000], \n",
    "                max_iter = 5000)\n",
    "lasso.fit(X_train, y_train)\n",
    "alpha = lasso.alpha_\n",
    "print(\"Best alpha :\", alpha)\n",
    "\n",
    "print(\"Try again for more precision with alphas centered around \" + str(alpha))\n",
    "lasso = LassoCV(alphas = [alpha * .6, alpha * .65, alpha * .7, alpha * .75, alpha * .8, \n",
    "                          alpha * .85, alpha * .9, alpha * .95, alpha, alpha * 1.05, \n",
    "                          alpha * 1.1, alpha * 1.15, alpha * 1.25, alpha * 1.3, alpha * 1.35, \n",
    "                          alpha * 1.4], \n",
    "                max_iter = 50000, cv = 10)\n",
    "lasso.fit(X_train, y_train)\n",
    "alpha = lasso.alpha_\n",
    "print(\"Best alpha :\", alpha)\n",
    "\n",
    "mse_cv = np.mean(lasso.mse_path_, axis = 0)\n",
    "rmse_cv = np.sqrt(mse_cv)\n",
    "print(\"cv of rmse :\", min(rmse_cv))\n",
    "\n",
    "y_train_lasso = lasso.predict(X_train)\n",
    "rmse_train = np.sqrt(mean_squared_error(y_train,y_train_lasso))\n",
    "print(\"RMSE on Training set :\", rmse_train)\n",
    "\n",
    "# Plot important coefficients\n",
    "coefs = pd.Series(lasso.coef_, index = X_train.columns)\n",
    "print(\"Lasso picked \" + str(sum(coefs != 0)) + \" features and eliminated the other \" +  \\\n",
    "      str(sum(coefs == 0)) + \" features\")\n",
    "imp_coefs = pd.concat([coefs.sort_values().head(10),\n",
    "                     coefs.sort_values().tail(10)])\n",
    "imp_coefs.plot(kind = \"barh\")\n",
    "plt.title(\"Coefficients in the Lasso Model\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1459 entries, 0 to 1458\n",
      "Data columns (total 2 columns):\n",
      "Id           1459 non-null int64\n",
      "SalePrice    1459 non-null float64\n",
      "dtypes: float64(1), int64(1)\n",
      "memory usage: 22.9 KB\n"
     ]
    }
   ],
   "source": [
    "y_test_pred = lasso.predict(X_test)\n",
    "y_test_pred = y_test_pred * std_y +  mean_y\n",
    "\n",
    "#生成提交测试结果\n",
    "\n",
    "#df = pd.DataFrame({\"Id\":test_Id, 'SalePrice':y_test_pred})\n",
    "#df.reindex(columns=['Id'])\n",
    "y = pd.Series(data = y_test_pred, name = 'SalePrice')\n",
    "df = pd.concat([test_Id, y], axis = 1)\n",
    "df.to_csv('submission.csv')\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
